/********************************************************************************
Paper: The Impact of Welfare on Intergroup Relations
Author: Akshay Dixit

Census/SHRUG: This .do file analyzes data from the 2011 census and 2013 economic survey to
produce Table S18 (comparison of villages along the Telangana/Andhra Pradesh border)
*******************************************************************************/

clear all

gl data "$identity/data"

cd "$data"

********************************************************************************

*** Import 2011 census data ***

import excel using "DH_2011_DCHB_Village_Release_2800.xlsx", sh(Village_Data_2800) firstrow clear
keep if DistrictName == "Mahbubnagar" | DistrictName == "Kurnool"

ren DistrictName district_name
ren SubDistrictName subdistrict_name
ren VillageName place_name

replace district_name = lower(district)
replace subdistrict_name = lower(subdistrict_name)
replace subdistrict_name = "c belagal" if subdistrict_name == "c.belagal"

save "ap_census_2011_village_facilities.dta", replace

********************************************************************************/

*** Import sample villages ***

import delimited using "sample_villages.csv", clear

********************************************************************************/

*** Merge with SHRUG data and Census data ***

replace subdistrict_name = "c belagal" if subdistrict_name == "c.belagal"
replace place_name = "srisailam project l f c" if place_name == "Srisailam Project (L.F.C)"

* Merge with SHRUG names
merge 1:m district_name subdistrict_name place_name using "$data/shrug/dta_shrug-v1.5.samosa-pop-econ-census-dta/shrug-v1.5.samosa-keys-dta/shrug_names.dta"

keep if _merge == 3
drop _merge

* Merge with SHRUG 2011 Census
merge 1:1 shrid using "$data/shrug/dta_shrug-v1.5.samosa-pop-econ-census-dta/shrug-v1.5.samosa-pop-econ-census-dta/shrug_pc11"

keep if _merge == 3
drop _merge

* Merge with SHRUG Econ Census
merge 1:1 shrid using "$data/shrug/dta_shrug-v1.5.samosa-pop-econ-census-dta/shrug-v1.5.samosa-pop-econ-census-dta/shrug_ec"

keep if _merge == 3
drop _merge

* Merge with 2011 Census data 
merge 1:m district_name subdistrict_name place_name using "ap_census_2011_village_facilities.dta"

keep if _merge == 3
drop _merge

********************************************************************************

*** Create and label variables to check for balance using 2011 census data ***
	
	// Share of SC, ST population
g sc_pop_share = pc11_pca_p_sc/pc11_pca_tot_p 
g st_pop_share = pc11_pca_p_st/pc11_pca_tot_p
g female_pop_share = TotalFemalePopulationofVilla/TotalPopulationofVillage
ren pc11_pca_no_hh no_hh

lab var sc_pop_share "Share of SC population"
lab var st_pop_share "Share of ST population"
lab var female_pop_share "Share of female population"
lab var no_hh "Number of households"

	// Employment
g ec13_emp_all_percapita = ec13_emp_all/pc11_pca_tot_p
g ec13_emp_manuf_percapita = ec13_emp_manuf/pc11_pca_tot_p
g ec13_emp_services_percapita = ec13_emp_services/pc11_pca_tot_p

lab var ec13_emp_all_percapita "Employment (share of population)"
lab var ec13_emp_manuf_percapita "Manufacturing employment (share of population)"
lab var ec13_emp_services_percapita "Services employment (share of population)"

ren ec13_emp_all_percapita emp_all
ren ec13_emp_manuf_percapita emp_manuf
ren ec13_emp_services_percapita emp_services

	// Literacy
g literate_share = pc11_pca_p_lit/pc11_pca_tot_p
lab var literate_share "Share of literate population"

	// Educational facilities
lab var pc11_vd_p_sch "Number of primary schools"
lab var pc11_vd_m_sch "Number of middle schools"
lab var pc11_vd_s_sch "Number of secondary schools"
lab var pc11_vd_s_s_sch "Number of senior secondary schools"

ren pc11_vd_p_sch p_sch
ren pc11_vd_m_sch m_sch
ren pc11_vd_s_sch s_sch
ren pc11_vd_s_s_sch s_s_sch

	// Services and public goods
g tap_water_treated = (TapWaterTreatedStatusA1N == 1 & TapWaterTreatedFunctioningAl == 1)
g river_canal_status = (RiverCanalStatusA1NA2 == 1)
g all_weather_road = (AllWeatherRoadStatusA1NA == 1)
g public_bus = (PublicBusServiceStatusA1 == 1)
g closed_drainage = (ClosedDrainageStatusA1NA == 1)
g mobile_phone_coverage = (MobilePhoneCoverageStatusA == 1)
g bank = (CommercialBankStatusA1NA == 1 | CooperativeBankStatusA1NA == 1)
g pds = (PublicDistributionSystemPDS == 1)
g nutrition_center = (NutritionalCentresICDSStatus == 1 | NutritionalCentresAnganwadiCe == 1)
g asha = (ASHAStatusA1NA2 == 1)
ren pc11_vd_power_agr_sum power_sum
ren pc11_vd_power_agr_win power_win

lab var pds "Public Distribution System"
lab var nutrition_center "Nutrition Center (ICDS or Anganwadi)"
lab var asha "Community health workers (ASHA)"

lab var tap_water_treated "Tap water (treated)"
lab var river_canal_status "River/canal access"
lab var all_weather_road "All weather road"
lab var public_bus "Public bus"
lab var closed_drainage "Closed drainage"
lab var mobile_phone_coverage "Mobile phone coverage"
lab var bank "Bank (commercial or cooperative)"

	// Agriculture
g tractors = (TractorsStatusA1NA2 == 1)
g mandi = (MandisRegularMarketStatusA == 1)
g weekly_haat = (WeeklyHaatStatusA1NA2 == 1)
g sown_area = NetAreaSowninHectares/TotalGeographicalAreainHect
g unirrigated_area = TotalUnirrigatedLandAreain/TotalGeographicalAreainHect

lab var sown_area "Net Area Sown (proportion of total area)"
lab var unirrigated_area "Unirrigated area (proportion of total area)"
lab var tractors "Tractors available"
lab var mandi "Mandi (regular market)"
lab var weekly_haat "Haat (weekly market)"

	// Social groups
g shg = (SelfHelpGroupSHGStatus == 1)
g library = (PublicReadingRoomStatusA1 == 1 | PublicLibraryStatusA1NA2 == 1)
g community_center = (CommunityCentrewithwithoutTV == 1)

lab var shg "Self Help Group"
lab var library "Public library or reading room"
lab var community_center "Community center" 

********************************************************************************

*** Run regressions and export table ***

putexcel set "$analysis/balance_check.xlsx", replace 
putexcel B1=("(1)") C1=("(2)") D1=("(3)") E1=("(4)") F1=("(5)") G1=("(6)")
putexcel C2=("TS mean") D2=("AP mean") E2=("Difference") F2=("p-value") G2=("N")

local balance sc_pop_share st_pop_share female_pop_share literate_share shg no_hh sown_area unirrigated_area emp_all emp_manuf emp_services all_weather_road public_bus closed_drainage tap_water_treated river_canal_status mobile_phone_coverage power_sum power_win bank pds nutrition_center asha library community_center
local row = 3
local counter = 1

foreach var of local balance {
	
	di "`var'"
	
	sleep 2000
	local varlabel : var label `var'
	
		// TS mean
	qui sum `var' if telangana == 1
	local ts_mean = (r(mean))
	
		// AP mean
	qui sum `var' if telangana == 0
	local ap_mean = (r(mean))
	
		// Regression
	qui reg `var' telangana, vce(robust) 
	local p = 2*ttail(e(df_r), abs(_b[telangana]/_se[telangana]))
	local difference = _b[telangana]
	
		// Output
	putexcel A`row' = ("(`counter')")
	putexcel B`row' = ("`varlabel'")
	putexcel C`row' = (trim("`: display %10.2f `ts_mean''"))
	putexcel D`row' = (trim("`: display %10.2f `ap_mean''"))
	putexcel E`row' = ((_b[telangana]))
	putexcel F`row' = (`p')
	putexcel G`row' = (e(N))
	
	local ++row
	local ++counter
	
}
********************************************************************************

clear


